### Delete if runs
# project_dir <- file.path("~/Desktop/tmp/wastewater-communities")
# metadata_dir <- file.path(project_dir, "/")
#
# dat_files <- c(
# env_meta = file.path(metadata_dir, "env_meta.csv"),
# seq_meta = file.path(metadata_dir, "seq_meta.csv"),
# id_key = file.path(metadata_dir, "id_keys.csv"),
# merge_id = file.path(metadata_dir, "merge_id_key.csv"),
# key_meta = file.path(metadata_dir, "key_meta.csv"),
# mr_blast_file = file.path(project_dir, "wastewater_blast.rdata")
# )
#
# # metadata
# meta_env_src <- read_csv(dat_files['env_meta'])
#
#
# # Loading blast based OTU count data.
# load(dat_files['mr_blast_file'])
#
# # Only including spray irrigation and treatment plant data in analysis.
# plantObj <- which((pData(wastewaterMRobj_blast)$study_type == "plant" &
# pData(wastewaterMRobj_blast)$Stage_name != "Influent post screening") |
# pData(wastewaterMRobj_blast)$Region_Site == "MA_SI1") %>%
# {wastewaterMRobj_blast[,.]} %>% cumNorm(p = 0.75)
#
# ww_meta <- plantObj %>% pData() %>%
# as.data.frame %>% mutate(Seq_ID = row.names(.)) %>%
# separate(Env_ID,c("Region","TP","Date","Stage"), sep = "_") %>%
# unite("WW_TP", Region,TP, sep = "_", remove = F) %>%
# mutate(plant_name = paste(ifelse(Region == "MA",
# "Mid-Atlantic",
# "Midwest"),
# ifelse(TP %in% c("TP1","TP2"),
# paste0("WW",TP),TP))) %>%
# select(Seq_ID, WW_TP, plant_name, Region, TP, Date, Stage, Stage_name,
# study_type, Season, StageID, StageID_all_plants)
# rownames(ww_meta) <- ww_meta$Seq_ID
plantObj <- readRDS("plantObj.rds")
ww_meta <- pData(plantObj)
count_dat <- metagenomeSeq::expSummary(plantObj)
count_dat$Seq_ID <- rownames(count_dat)
count_df <- left_join(ww_meta, count_dat)
count_tbl <- plantObj@assayData$counts %>% as_tibble()
count_tbl$OTU <- plantObj@featureData@data$OTU
count_tbl <- count_tbl %>% gather("sam","count",-OTU) %>%
mutate(count = if_else(count != 0, 1,0)) %>%
group_by(OTU) %>% summarise(n_sam = sum(count)) %>%
filter(n_sam == 1)
assigned_un_df <- count_tbl %>% left_join(plantObj@featureData@data) %>%
mutate(assinged_un = if_else(grepl("OTU_",Taxonomy), "unassigned","assigned")) %>%
group_by(assinged_un) %>% summarise(count = n())
# total seq count
count_total <- sum(count_df$libSize)
# total number of samples
num_samples <- nrow(count_df)
# average seq count per sample
avg_seq_sample <- count_total/num_samples
# number of unique assigned species level OTUs
n_unique_assigned_otus <- assigned_un_df$count[assigned_un_df$assinged_un == "assigned"]
# number of unique unassigned species level OTUs
n_unique_unassigned_otus <- assigned_un_df$count[assigned_un_df$assinged_un == "unassigned"]
Total sequence count: 6.11383710^{6}
Total number of samples: 65
Average sequence count per sample : 9.40590310^{4}
Unique OTUs are defined as OTUs only present in one sample.
Total number of unique assigned species OTUs: 1521 Total number of unique unassigned species OTUs: 347
sample_count_summary <- count_df %>% select(WW_TP, Stage) %>%
group_by(WW_TP, Stage) %>%
summarize(count = n())
sample_count_summary$count %>% sum()
## [1] 65
sample_count_summary %>% kable()
| WW_TP | Stage | count |
|---|---|---|
| MA_SI1 | AfterUVTreatment | 7 |
| MA_SI1 | BeforeUVTreatment | 6 |
| MA_SI1 | HoldingPondInlet | 7 |
| MA_SI1 | PumpHouseInlet | 8 |
| MA_TP1 | ActivatedSludgeReactor | 2 |
| MA_TP1 | Effluent | 3 |
| MA_TP1 | RawInfluent | 4 |
| MA_TP1 | SecondaryClarifier | 2 |
| MA_TP2 | ActivatedSludgeReactor | 2 |
| MA_TP2 | Effluent | 1 |
| MA_TP2 | RawInfluent | 2 |
| MA_TP2 | SecondaryClarifier | 2 |
| MW_TP1 | Effluent | 3 |
| MW_TP1 | PostAeration | 2 |
| MW_TP1 | RawInfluent | 3 |
| MW_TP1 | SecondaryClarifier | 2 |
| MW_TP2 | CellB | 4 |
| MW_TP2 | Effluent | 3 |
| MW_TP2 | RawInfluent | 2 |
SC- Hill number based coverage estimate, extrapolated rarefaction curve, considering the singleton to tentons (not sure this is a real word)? Coverage is the estimated OTU coverage calculated by dividing the number of observed OTUs by the Chao1 diversity estimate.
Chao1, is based on singletons and likely overestimates species diversity for 16S data. Therefore Chao1 based coverage estimates are conservative. Filtering samples with less than 100 sequences.
div_est <- plantObj %>% MRcounts() %>% DataInfo()
div_est$StageID <- factor(count_df$StageID)
p <- ggplot(div_est, aes(x = n, y = SC)) +
geom_point(aes(color = StageID)) + scale_x_log10() +
geom_vline(aes(xintercept = 100), linetype = 2, color = "grey 60") +
theme_bw() +
labs(x = "Number of Sequences",y = "Coverage") +
theme(legend.position = "bottom")
ggMarginal(p, type = "histogram", margins = "x")
Number of observed sequences compared to the estimated coverage. The vertical dotted line indicates the cut off for sample excluded from the analysis. Histogram at the top show the distribution of samples relative to the number of sequences per sample.
Excluding samples with less than 100 counts.
plantObj <- which(colSums(plantObj) > 100) %>% {plantObj[, .]}
## creating phyloseq object
plant_phy <-plantObj %>% MRexperiment2biom(norm = TRUE,log = TRUE) %>%
import_biom2()
colnames(plant_phy@tax_table) <- c("taxa_OTU","Taxonomy","Kingdom",
"Phylum","Class","Order",
"Family","Genus","Species")
plant_phy@tax_table <- plant_phy@tax_table[,-2]
Calculating Diversity Metrics Diversity metrics were calculated using OTU level count data.
waste_counts <- plantObj %>% MRcounts(norm =TRUE, log = TRUE)
ww_div <- data_frame(sampleID = colnames(waste_counts),
shannon = diversity(waste_counts, MARGIN = 2),
simpson = diversity(waste_counts, MARGIN = 2, index = "simpson"),
specnumber = specnumber(waste_counts,MARGIN = 2)) %>%
gather(diversity, metric, -sampleID)
ww_div_df <- ww_meta %>%
mutate(Stage = ifelse(Stage %in% c("RawInfluent","PostScreeningInfluent"),
"Influent",Stage)) %>%
right_join(ww_div, by = c("Seq_ID" ="sampleID"))
ww_div_df %>% filter(Stage == "Influent") %>%
ggplot() + geom_point(aes(x = plant_name, y = metric, color = plant_name)) +
facet_wrap(~diversity, scale = "free_y") +
theme_bw() + theme(axis.text.x = element_blank(), legend.position = "bottom") +
labs(x = "Wastewater Treatment Plant", y = "Diversity Metric", color = "")
Initially using a parametric model, ANOVA, to test for differences in alpha diversity by wastewater treatment plants. Based on the diagnostic plots the the residuals are not normally distributed with long tails. Due to the non-normally distributed residuals the non-parametric Kruskal-Wallis test was also used to test for statistically significant differences. The alpha diversity is not statistically significant when comparing influents among treatment plants.
ww_inf_df <- ww_div_df %>% filter(Stage == "Influent")
fit_aov <- function(df) aov(metric~WW_TP, data = df)
test_kruskal <- function(df) kruskal.test(df$metric,factor(df$WW_TP))
inf_div_test <- ww_inf_df %>% group_by(diversity) %>% nest() %>%
mutate(aov_mod = map(data, fit_aov), krusk = map(data, test_kruskal))
aov_summary <- inf_div_test$aov_mod %>% map_df(glance) %>%
rename(f_stat = statistic, f_p = p.value)
aov_summary$diversity <- inf_div_test$diversity
krusk_summary <- inf_div_test$krusk %>% map_df(glance) %>%
rename(kruskal = statistic, kruk_p = p.value) %>%
select(-parameter, -method)
krusk_summary$diversity <- inf_div_test$diversity
inf_div_test_tbl <- aov_summary %>%
select(diversity, logLik, AIC,BIC, f_stat, f_p) %>% left_join(krusk_summary)
inf_div_test_tbl %>% kable()
| diversity | logLik | AIC | BIC | f_stat | f_p | kruskal | kruk_p |
|---|---|---|---|---|---|---|---|
| shannon | -8.003281 | 26.00656 | 27.51949 | 0.4840213 | 0.7055600 | 2.900000 | 0.4073016 |
| simpson | 51.631243 | -93.26249 | -91.74956 | 0.5866456 | 0.6456501 | 2.900000 | 0.4073016 |
| specnumber | -77.682358 | 165.36472 | 166.87764 | 0.6685194 | 0.6015781 | 2.836364 | 0.4175487 |
ANOVA Diagnostic plots
div_metrics <- c("shannon","simpson","specnumber")
for(i in 1:3){
print(autoplot(inf_div_test$aov_mod[[i]]) +
ggtitle(paste0(div_metrics[i])))
}
mrexp_inf <- plantObj[, which(pData(plantObj)$Stage_name %in%
c("Raw influent",
"Influent post screening"))]
mrexp_inf <- aggregateByTaxonomy(mrexp_inf,
lvl = "genus",
alternate = TRUE,
norm = TRUE, log = TRUE)
pData(mrexp_inf)$plant_name <- pData(mrexp_inf)$plant_name %>% factor()
mrexp_inf <- cumNorm(mrexp_inf, p = 0.75)
mrexp_inf <- filterData(mrexp_inf, present = 7, depth = 1)
# mrexp_inf <- cumNorm(mrexp_inf, p = 0.75)
s <- normFactors(mrexp_inf)
pd <- pData(mrexp_inf)
settings <- zigControl(maxit = 1, verbose = FALSE)
mod <- model.matrix(~plant_name, data = pd)
colnames(mod) <- levels(pd$plant_name)
res = fitZig(obj <- mrexp_inf, mod = mod, control = settings)
zigFit = res$fit
fit2 = eBayes(zigFit)
top_tbl <- topTableF(fit2,p.value = 0.075)
top_feat <- top_tbl %>% rownames()
count_tbl <- MRcounts(mrexp_inf)
count_tbl <- count_tbl[rownames(count_tbl) %in% top_feat,]
count_df <- count_tbl %>% as.data.frame() %>%
rownames_to_column(var = "genus") %>%
gather("Seq_ID", "Count",-genus) %>% left_join(pd)
Differentially abundance genus between treatment plant influent samples. Might want to exclude MW_TP2 since there is only one sample.
kable(top_tbl)
| Mid.Atlantic.WWTP1 | Mid.Atlantic.WWTP2 | Midwest.WWTP1 | Midwest.WWTP2 | scalingFactor | AveExpr | F | P.Value | adj.P.Val | |
|---|---|---|---|---|---|---|---|---|---|
| Streptococcus | 3.7106349 | -2.2729514 | -1.4019061 | -1.5863193 | 7.881069 | 5.117024 | 296.3466 | 0e+00 | 3e-07 |
| Bifidobacterium | 4.2305666 | -1.6500646 | -0.8484295 | -1.8457110 | 5.031820 | 5.019435 | 242.1323 | 0e+00 | 3e-07 |
| Paracoccus | -0.2321056 | -1.2775004 | 0.0920867 | 0.2259361 | 12.279543 | 3.364678 | 182.6427 | 0e+00 | 7e-07 |
| Faecalibacterium | 2.6022519 | -1.7300736 | -0.3179924 | -1.2048628 | 4.864957 | 3.546670 | 177.8089 | 0e+00 | 7e-07 |
| Clostridium | 3.8715052 | 2.3704363 | 0.4583797 | 2.2030953 | 3.137254 | 5.674791 | 153.5457 | 0e+00 | 8e-07 |
| Blautia | 3.9436406 | -2.4829157 | -0.3428441 | -4.4061591 | 3.114230 | 3.867834 | 185.0868 | 0e+00 | 8e-07 |
| Eubacterium | 2.6387443 | -2.4973135 | -1.0345646 | -2.2384306 | 5.285341 | 3.241547 | 150.3832 | 0e+00 | 8e-07 |
| Propionibacterium | 0.8938151 | -0.1300985 | 0.5111170 | -0.9144029 | 7.847056 | 3.359342 | 139.4012 | 0e+00 | 9e-07 |
| Lactococcus | 2.6804230 | -1.2802727 | -0.7181955 | -1.8288987 | 3.957832 | 3.251467 | 138.0501 | 0e+00 | 9e-07 |
| Lachnoclostridium | 1.7521640 | 0.5113022 | -0.2986833 | -1.7004446 | 5.761949 | 3.378824 | 111.2265 | 1e-07 | 2e-06 |
count_df %>%
# mutate(plant_name = str_replace(plant_name, "Mid-Atlantic ", "MA-"),
# plant_name = str_replace(plant_name, "Midwest ", "MW-")) %>%
ggplot() +
geom_point(aes(x = plant_name, y = Count, color = plant_name)) +
geom_line(aes(x = plant_name, y = Count, color = plant_name)) +
facet_wrap(~genus,nrow = 2) +
theme_bw() + scale_y_log10() + labs(y = "Relative Abundance",
color = "Treatment Plant",
x = "Treatment Plant") +
theme(legend.position = "bottom",
axis.text.x = element_text(angle = 90),
strip.text =element_text(face = "italic"))
Need to check regarding limited number of effluent samples ….
ww_div_inf_eff <- ww_div_df %>% filter(Stage %in% c("Influent","Effluent")) %>%
group_by(plant_name, Stage, Date, diversity) %>%
summarise(metric = median(metric))
ww_div_inf_eff %>% ungroup() %>%
mutate(Stage = factor(Stage, levels = c("Influent","Effluent")),
Date = paste(plant_name, Date)) %>%
ggplot(aes(x = Stage, y = metric)) +
geom_path(aes(group = Date), color = "grey60") +
geom_point(aes(x = Stage, y = metric, color = plant_name)) +
facet_wrap(~diversity, scale = "free_y") +
labs(y = "Diversity Metric", color = "WWTP") + theme_bw() +
theme(legend.position = "bottom")
Using a paired t-test to compared the alpha diversity between influent and effluent samples, specnumber (number of observed OTUs) and shannon’s diversity index were significantly higher for influent compared to effluent. Shannon’s diversity index is close to the significance threshold of 0.05, and when accounting for multiple comparisons Shannon’s diversity index is no longer significant. The number of observed OTUs is biased by sequencing depth, as the effluent samples consistently have fewer reads than influent samples the observed difference is likely at least partially due to differences in sequencing depth.
inf_eff_paired <- ww_div_inf_eff %>% group_by(plant_name, Date, Stage, diversity) %>%
summarize(metric = median(metric)) %>% spread(Stage, metric) %>%
filter(!is.na(Influent), !is.na(Effluent))
inf_v_ef_test_df <- data_frame()
for(i in c("shannon","simpson","specnumber")){
inf_v_ef_test_df <- inf_eff_paired %>%
filter(diversity == i) %>%
{t.test(.$Influent, .$Effluent, paired = TRUE, alternative = "greater")} %>%
tidy() %>% mutate(diversity = i) %>% bind_rows(inf_v_ef_test_df)
}
inf_v_ef_test_df %>% select(diversity, estimate, statistic, p.value, parameter, conf.low, conf.high) %>% kable()
| diversity | estimate | statistic | p.value | parameter | conf.low | conf.high |
|---|---|---|---|---|---|---|
| specnumber | 1273.9000000 | 4.716943 | 0.0045962 | 4 | 698.1542891 | Inf |
| simpson | 0.0030069 | 1.228177 | 0.1433526 | 4 | -0.0022124 | Inf |
| shannon | 0.9979746 | 2.104947 | 0.0515381 | 4 | -0.0127536 | Inf |
mrexp_inf_eff <- plantObj[, which(pData(plantObj)$Stage_name %in%
c("Raw influent",
"Influent post screening","Effluent"))]
mrexp_inf_eff <- aggregateByTaxonomy(mrexp_inf_eff,
lvl = "genus",
alternate = TRUE,
norm = TRUE, log = TRUE)
pData(mrexp_inf_eff)$plant_name <- pData(mrexp_inf_eff)$plant_name %>% factor()
pData(mrexp_inf_eff)$Stage_name <- pData(mrexp_inf_eff)$Stage_name %>%
ifelse(. %in% c("Raw influent","Influent post screening"),"Influent",.) %>%
factor(levels = c("Influent","Effluent"))
mrexp_inf_eff <- cumNorm(mrexp_inf_eff, p = 0.75)
mrexp_inf_eff <- filterData(mrexp_inf_eff, present = 7, depth = 1)
# mrexp_inf_eff <- cumNorm(mrexp_inf_eff, p = 0.75)
s <- normFactors(mrexp_inf_eff)
pd <- pData(mrexp_inf_eff)
settings = zigControl(maxit = 1, verbose = FALSE)
mod = model.matrix(~Stage_name, data = pd)
colnames(mod) = levels(pd$Stage_name)
res = fitZig(obj = mrexp_inf_eff, mod = mod, control = settings)
zigFit = res$fit
fit2 <- eBayes(zigFit)
top_tbl <- topTableF(fit2,p.value = 0.075)
top_feat <- top_tbl %>% rownames()
count_tbl <- MRcounts(mrexp_inf_eff)
count_tbl <- count_tbl[rownames(count_tbl) %in% top_feat,]
count_df <- count_tbl %>% as.data.frame() %>%
rownames_to_column(var = "genus") %>%
gather("Seq_ID", "Count",-genus) %>% left_join(pd) %>%
mutate(plant_date = paste0(Date, plant_name))
kable(top_tbl)
| Influent | Effluent | scalingFactor | AveExpr | F | P.Value | adj.P.Val | |
|---|---|---|---|---|---|---|---|
| Trichococcus | 5.7475672 | -3.9840536 | 0.4676418 | 4.121937 | 111.78295 | 0 | 0 |
| Leucobacter | 2.1108296 | -3.1165125 | 5.5592729 | 3.060460 | 97.49628 | 0 | 0 |
| Hyphomicrobium | -0.1621981 | 2.1289837 | 3.5435518 | 2.229523 | 95.93467 | 0 | 0 |
| Pseudomonas | 10.7783844 | -1.8564032 | -5.5222311 | 7.379846 | 83.00545 | 0 | 0 |
| Streptococcus | 3.9734565 | -3.4186268 | 3.6933844 | 3.874773 | 83.89276 | 0 | 0 |
| Mycobacterium | 1.2411825 | 0.0716067 | 6.4994120 | 3.838430 | 82.40745 | 0 | 0 |
| Bifidobacterium | 3.3391492 | -4.3308347 | 5.4268273 | 3.532881 | 84.68735 | 0 | 0 |
| Clostridium | 3.5044912 | -3.8787399 | 7.0094263 | 4.699553 | 70.43494 | 0 | 0 |
| Lachnoclostridium | 1.3140308 | -3.8193525 | 6.6686699 | 2.374673 | 82.82946 | 0 | 0 |
| Propionibacterium | 1.2261816 | -3.7892910 | 6.8894766 | 2.374481 | 71.41008 | 0 | 0 |
ggplot(count_df) +
geom_path(aes(x = Stage_name, y = Count, group = plant_date),
color = "grey60") +
geom_point(aes(x = Stage_name, y = Count, color = plant_name)) +
facet_wrap(~genus, nrow = 2) +
theme_bw() + scale_y_log10() +
labs(y = "Relative Abundance", x = "Stage Name", color = "Treatment Plant") +
theme(legend.position = "bottom",
strip.text = element_text(face = "italic"))
Description of metrics based on Biological Diversity frontiers in measurement and assessment. Edited by Anne E. Magurran and Brian McGill
\(\Beta\) diversity was calculated using two different metrics.
Bray Curtis \(S_{BC}\)
\[ S_{BC} = 1 - \frac{\sum_{i=1}^S |M_{i1} - M_{i2}|}{\sum_{i=1}^S (M_{i1} + M_{i2})} \]
\(S\) is the total number of OTUs in samples 1 and 2.
Chao et al. 2006 Abundance-based similarity indices and their estimatation when there are unseen species in samples. Biometrics, 62, 361-371
Jaccard \(S_J\)
* Jaccard is an incidence based (presence/absence) \(\Beta\) diversity measure.
\[ S_J = \frac{S_{12}}{(S_1 + S_2 - S_{12})} \]
Section Objective
Look at all the samples together. Then look for interesting results and for these interesting results look at statistically significant differences.
For post-hoc comaprison most differences are spray irrigation stages.
Ordination Methods
Notes from Numerical Ecology Legendre and Legendra 3rd Edition p. 493
Summary of NMDS https://jonlefcheck.net/2012/10/24/nmds-tutorial-in-r/
Statistical Tests
ANOSIM - analysis of similarities, testing for similarities between groups, rank order of dissimilarity value based statistical test.
No post-hoc test for ANOSIM, testing for pairwise differences using betadisper.
Betadisper is an implementation of PERMDISP2, multivariate analgoue to Levene’s test for variance homogeneity.
Evaluates the variance or distance of samples from group centroid.
dist_methods <- unlist(distanceMethodList)[c(8,10)]
pl <- subset_samples(plant_phy, study_type == "plant")
pl_pcoa_list <- list()
pl_nmds_list <- list()
for (i in dist_methods) {
# Calculate distance matrix
iDist <- distance(pl, method = i)
# Calculate ordination
pl_pcoa_list[[i]] <- ordinate(pl, method = "PCoA", distance = iDist)
pl_nmds_list[[i]] <- ordinate(pl, method = "NMDS", distance = iDist)
}
# plot_ordination(pl, pl_pcoa_list[[1]], color= "Stage", title = "Bray Curtis PCoA") + theme_bw() + stat_ellipse()
# plot_ordination(pl, pl_nmds_list[[1]], color= "Stage", title = "Bray Curtis NMDS") + theme_bw() + stat_ellipse()
# stressplot(pl_nmds_list[[1]])
# plot_ordination(pl, pl_pcoa_list[[2]], color= "Stage", title = "Jaccard PCoA") + theme_bw()
# plot_ordination(pl, pl_nmds_list[[2]], color= "Stage", title = "Jaccard NMDS") + theme_bw()
# stressplot(pl_nmds_list[[2]])
iDist <- distance(pl, method = "bray")
anosim(iDist,grouping = factor(pl@sam_data$Stage))
##
## Call:
## anosim(dat = iDist, grouping = factor(pl@sam_data$Stage))
## Dissimilarity: bray
##
## ANOSIM statistic R: 0.5632
## Significance: 0.001
##
## Permutation: free
## Number of permutations: 999
Treatment plant stages are significantly different testing for pairwise differences
## betadisper has a post-hoc test
iDist <- distance(pl, method = "bray")
beta_fit <- betadisper(iDist,group = factor(pl@sam_data$Stage))
anova(beta_fit) %>% tidy() %>% kable()
| term | df | sumsq | meansq | statistic | p.value |
|---|---|---|---|---|---|
| Groups | 5 | 0.3117790 | 0.0623558 | 11.31555 | 1.09e-05 |
| Residuals | 24 | 0.1322551 | 0.0055106 | NA | NA |
TukeyHSD(beta_fit)$group %>% as.data.frame() %>%
rownames_to_column(var = "comparison") %>%
filter(`p adj` < 0.075) %>% kable(digits = 3)
| comparison | diff | lwr | upr | p adj |
|---|---|---|---|---|
| PostAeration-ActivatedSludgeReactor | -0.529 | -0.785 | -0.272 | 0.000 |
| PostAeration-CellB | -0.399 | -0.680 | -0.118 | 0.002 |
| PostAeration-Effluent | -0.568 | -0.812 | -0.325 | 0.000 |
| RawInfluent-PostAeration | 0.489 | 0.249 | 0.730 | 0.000 |
| SecondaryClarifier-PostAeration | 0.483 | 0.231 | 0.734 | 0.000 |
si <- subset_samples(plant_phy, study_type == "spray")
si_pcoa_list <- list()
si_nmds_list <- list()
for (i in dist_methods) {
print(i)
# Calculate distance matrix
iDist <- distance(si, method = i)
# Calculate ordination
si_pcoa_list[[i]] <- ordinate(si, method = "PCoA", distance = iDist)
si_nmds_list[[i]] <- ordinate(si, method = "NMDS", distance = iDist)
}
## [1] "bray"
## Run 0 stress 0.08628405
## Run 1 stress 0.08628405
## ... Procrustes: rmse 9.610914e-07 max resid 2.007411e-06
## ... Similar to previous best
## Run 2 stress 0.3829659
## Run 3 stress 0.2558146
## Run 4 stress 0.08628405
## ... Procrustes: rmse 2.892117e-06 max resid 9.106018e-06
## ... Similar to previous best
## Run 5 stress 0.08628405
## ... New best solution
## ... Procrustes: rmse 1.062147e-06 max resid 1.871165e-06
## ... Similar to previous best
## Run 6 stress 0.09761258
## Run 7 stress 0.1074574
## Run 8 stress 0.1601557
## Run 9 stress 0.09761272
## Run 10 stress 0.08628405
## ... Procrustes: rmse 2.545378e-06 max resid 4.930609e-06
## ... Similar to previous best
## Run 11 stress 0.0964332
## Run 12 stress 0.1600244
## Run 13 stress 0.1074574
## Run 14 stress 0.1663974
## Run 15 stress 0.09643206
## Run 16 stress 0.08628405
## ... Procrustes: rmse 8.360176e-07 max resid 1.547145e-06
## ... Similar to previous best
## Run 17 stress 0.1609997
## Run 18 stress 0.1074574
## Run 19 stress 0.09636965
## Run 20 stress 0.0976128
## *** Solution reached
## [1] "jaccard"
## Run 0 stress 0.08628405
## Run 1 stress 0.08628405
## ... New best solution
## ... Procrustes: rmse 1.580754e-06 max resid 5.108713e-06
## ... Similar to previous best
## Run 2 stress 0.09761348
## Run 3 stress 0.3856545
## Run 4 stress 0.08628405
## ... Procrustes: rmse 3.412486e-06 max resid 1.333437e-05
## ... Similar to previous best
## Run 5 stress 0.09761607
## Run 6 stress 0.1074674
## Run 7 stress 0.08628405
## ... Procrustes: rmse 2.537932e-06 max resid 6.172675e-06
## ... Similar to previous best
## Run 8 stress 0.1601557
## Run 9 stress 0.09761644
## Run 10 stress 0.09628348
## Run 11 stress 0.1617694
## Run 12 stress 0.08628405
## ... Procrustes: rmse 2.067053e-06 max resid 5.999207e-06
## ... Similar to previous best
## Run 13 stress 0.08628405
## ... Procrustes: rmse 1.855991e-06 max resid 6.65927e-06
## ... Similar to previous best
## Run 14 stress 0.09636983
## Run 15 stress 0.08628405
## ... Procrustes: rmse 4.220688e-06 max resid 1.290558e-05
## ... Similar to previous best
## Run 16 stress 0.1601557
## Run 17 stress 0.08628405
## ... New best solution
## ... Procrustes: rmse 6.392372e-07 max resid 2.221082e-06
## ... Similar to previous best
## Run 18 stress 0.08628405
## ... Procrustes: rmse 1.161473e-06 max resid 3.827379e-06
## ... Similar to previous best
## Run 19 stress 0.09625276
## Run 20 stress 0.08628405
## ... Procrustes: rmse 1.961568e-06 max resid 4.142391e-06
## ... Similar to previous best
## *** Solution reached
#plot_ordination(si, si_pcoa_list[[1]], color= "Stage", title = "Bray Curtis PCoA") + theme_bw()
#plot_ordination(si, si_nmds_list[[1]], color= "Stage", title = "Bray Curtis NMDS") + theme_bw()
stressplot(si_nmds_list[[1]])
#plot_ordination(si, si_pcoa_list[[2]], color= "Stage", title = "Jaccard PCoA") + theme_bw()
#plot_ordination(si, si_nmds_list[[2]], color= "Stage", title = "Jaccard NMDS") + theme_bw()
stressplot(si_nmds_list[[2]])
Spray irrigation stages are significantly different.
## betadisper has a post-hoc test
iDist <- distance(pl, method = "bray")
beta_fit <- betadisper(iDist,group = factor(pl@sam_data$Stage))
anova(beta_fit) %>% tidy() %>% kable()
| term | df | sumsq | meansq | statistic | p.value |
|---|---|---|---|---|---|
| Groups | 5 | 0.3117790 | 0.0623558 | 11.31555 | 1.09e-05 |
| Residuals | 24 | 0.1322551 | 0.0055106 | NA | NA |
TukeyHSD(beta_fit)$group %>% as.data.frame() %>%
rownames_to_column(var = "comparison") %>%
filter(`p adj` < 0.075) %>% kable(digits = 3)
| comparison | diff | lwr | upr | p adj |
|---|---|---|---|---|
| PostAeration-ActivatedSludgeReactor | -0.529 | -0.785 | -0.272 | 0.000 |
| PostAeration-CellB | -0.399 | -0.680 | -0.118 | 0.002 |
| PostAeration-Effluent | -0.568 | -0.812 | -0.325 | 0.000 |
| RawInfluent-PostAeration | 0.489 | 0.249 | 0.730 | 0.000 |
| SecondaryClarifier-PostAeration | 0.483 | 0.231 | 0.734 | 0.000 |
ww_div_treat <- ww_div_df %>% filter(study_type == "plant") %>%
group_by(plant_name, Stage, StageID, Date, diversity) %>%
summarise(metric = median(metric))
ww_div_treat %>% ungroup() %>%
mutate(Stage = factor(Stage,levels = c("Influent","ActivatedSludgeReactor",
"PostAeration","SecondaryClarifier",
"CellB","Effluent")),
Date = paste(plant_name, Date)) %>%
ggplot(aes(x = Stage, y = metric)) +
geom_line(aes(group = Date), color = "grey60") +
geom_point(aes(x = Stage, y = metric)) +
facet_grid(diversity~plant_name, scale = "free") +
labs(y = "Diversity Metric", color = "WWTP") + theme_bw() +
theme(axis.text.x = element_text(angle = 90))
Test for stage differences only performed for Mid-Atlantic WWTP1, only treatment plant with replicates at each stage. None of the stages are significantly different from each other.
fit_aov <- function(df) aov(metric~Stage, data = df)
treat_div_test <- ww_div_treat %>% filter(plant_name == "Mid-Atlantic WWTP1") %>%
group_by(diversity) %>% nest() %>% mutate(aov_mod = map(data, fit_aov))
aov_summary <- treat_div_test$aov_mod %>% map_df(glance) %>%
rename(f_stat = statistic, f_p = p.value)
aov_summary$diversity <- inf_div_test$diversity
aov_summary %>% select(diversity, logLik, AIC,BIC, f_stat, f_p) %>% kable()
| diversity | logLik | AIC | BIC | f_stat | f_p |
|---|---|---|---|---|---|
| shannon | 2.735477 | 4.529047 | 6.518523 | 0.8182523 | 0.5236262 |
| simpson | 76.543857 | -143.087714 | -141.098238 | 0.5831169 | 0.6447918 |
| specnumber | -80.400346 | 170.800692 | 172.790168 | 0.8403800 | 0.5135556 |
ANOVA Diagnostic plots
div_metrics <- c("shannon","simpson","specnumber")
for(i in 1:3){
print(autoplot(treat_div_test$aov_mod[[i]]) +
ggtitle(paste0(div_metrics[i])))
}
mrexp <- aggregateByTaxonomy(plantObj,
lvl = "genus",
alternate = TRUE,
norm = TRUE, log = TRUE)
pData(mrexp)$plant_name <- pData(mrexp)$plant_name %>% factor()
pData(mrexp)$Stage_name <- pData(mrexp)$Stage_name %>%
ifelse(. %in% c("Raw influent","Influent post screening"),"Influent",.) %>%
factor(levels =c("Influent", "Activated Sludge Reactor", "Post aeration",
"Secondary clarifier", "Cell B","Effluent"))
plant_zigFit <- function(mrexp, plant){
mrexp_plant <- mrexp[,which(pData(mrexp)$plant_name == plant)]
pData(mrexp_plant)$Stage_name <- factor(pData(mrexp_plant)$Stage_name)
mrexp_plant <- cumNorm(mrexp_plant, p = 0.75)
mrexp_plant <- filterData(mrexp_plant,
present = floor(dims(mrexp_plant)[2]/2),
depth = 1)
# mrexp_plant <- cumNorm(mrexp_plant, p = 0.75)
pd <- pData(mrexp_plant)
settings = zigControl(maxit = 1, verbose = FALSE)
mod = model.matrix(~Stage_name, data = pd)
colnames(mod) = levels(pd$Stage_name)
res = fitZig(obj = mrexp_plant, mod = mod, control = settings)
zigFit = res$fit
finalMod = res$fit$design
fit2 = eBayes(zigFit)
top_tbl <- topTableF(fit2,p.value = 0.075)
top_feat <- top_tbl %>% rownames()
count_tbl <- MRcounts(mrexp_plant)
count_tbl <- count_tbl[rownames(count_tbl) %in% top_feat,]
count_df <- count_tbl %>% as.data.frame() %>%
rownames_to_column(var = "genus") %>%
gather("Seq_ID", "Count",-genus) %>% left_join(pd)
list(count_df = count_df, top_tbl = top_tbl)
}
make_ww_tp_plot <- function(plant_zig){
ggplot(plant_zig$count_df) +
geom_point(aes(x = Stage_name, y = Count, color = Stage_name)) +
facet_wrap(~genus, nrow = 1) +
theme_bw() + scale_y_log10() +
labs(y = "Rel. Abu.", color = "Treatment Process") +
theme_bw() +
theme(axis.text.x = element_blank(),
axis.title.x = element_blank(),
strip.text = element_text(face = "italic"))
}
ww_tp <- pData(mrexp)$plant_name %>%
.[!grepl("SI", .)] %>% unique() %>% as.character()
ww_tp
## [1] "Mid-Atlantic WWTP1" "Midwest WWTP1" "Midwest WWTP2"
## [4] "Mid-Atlantic WWTP2"
ww_tp_plots <- ww_tp %>% set_names(ww_tp) %>%
map(plant_zigFit, mrexp = mrexp) %>% map(make_ww_tp_plot)
ggarrange(ww_tp_plots[[1]], ww_tp_plots[[4]],
ww_tp_plots[[2]], ww_tp_plots[[3]],
labels = c("A", "B", "C", "D"),
ncol = 1, nrow = 4,legend = "bottom")
ggsave("diff_abu_treat_process.tiff", width = 10, height = 8, dpi = 300)
for(i in ww_tp){
plant_zig <- plant_zigFit(mrexp, i)
print(paste("##### ",i))
print(kable(plant_zig$top_tbl))
}
[1] “##### Mid-Atlantic WWTP1”
| Influent | Activated.Sludge.Reactor | Secondary.clarifier | Effluent | scalingFactor | AveExpr | F | P.Value | adj.P.Val | |
|---|---|---|---|---|---|---|---|---|---|
| Clostridium | 4.6550534 | 2.6882460 | -0.1768883 | 0.1049857 | 0.1038452 | 5.191797 | 407.3712 | 0 | 0 |
| Streptococcus | 4.4214939 | -3.9155657 | -4.1521202 | -4.1548620 | 5.1290678 | 4.365203 | 257.5267 | 0 | 0 |
| Psychrobacter | 9.8822174 | -4.7759110 | -1.6178157 | -5.9338067 | -4.2300135 | 5.003581 | 208.9112 | 0 | 0 |
| Blautia | 3.6712287 | -3.8071717 | -3.5503492 | -3.2899341 | 4.1688382 | 3.503733 | 204.7747 | 0 | 0 |
| Bifidobacterium | 4.5597415 | -3.5621441 | -3.6889689 | -3.8951910 | 3.7574605 | 4.042504 | 186.6297 | 0 | 0 |
| Mycobacterium | 2.3034496 | 0.9282716 | 0.1477461 | 0.5542501 | 3.6303122 | 4.450662 | 174.5037 | 0 | 0 |
| Hyphomicrobium | -0.3471074 | 1.8481416 | 2.1486294 | 0.6735379 | 5.2495607 | 3.166733 | 141.4506 | 0 | 0 |
| Legionella | 0.5239134 | 1.2837241 | 2.9426167 | 4.0826619 | 1.8743828 | 3.297784 | 145.0902 | 0 | 0 |
| Collinsella | 3.7740783 | -3.4214271 | -3.4582399 | -3.0507862 | 2.6595376 | 3.010166 | 122.9244 | 0 | 0 |
| Halomonas | 1.6081618 | -0.7032213 | -1.3810141 | -0.2411062 | 3.6602712 | 2.978726 | 116.0583 | 0 | 0 |
| [1] “##### Midwes | t WWTP1“ |
| Influent | Post.aeration | Secondary.clarifier | Effluent | scalingFactor | AveExpr | F | P.Value | adj.P.Val | |
|---|---|---|---|---|---|---|---|---|---|
| Pseudomonas | -23.630533 | 0.105302 | -8.240840 | -20.4045226 | 77.719383 | 6.227538 | 61.95803 | 0 | 0 |
| Clostridium | 2.122473 | -5.399842 | -3.374173 | -2.2840493 | 8.759675 | 4.389424 | 41.76208 | 0 | 0 |
| Microbacterium | 1.379457 | -2.054133 | -1.623306 | -1.1874663 | 7.927465 | 4.291348 | 34.49602 | 0 | 0 |
| Halomonas | 12.769173 | 1.104469 | 3.076655 | 5.2344307 | -22.164137 | 4.303394 | 34.43027 | 0 | 0 |
| Trichococcus | -4.879886 | -3.818507 | -6.040672 | -11.0197100 | 27.417079 | 3.624221 | 31.67985 | 0 | 0 |
| Legionella | -1.242781 | 1.187796 | 1.363266 | 0.6326445 | 9.776053 | 3.959564 | 31.25445 | 0 | 0 |
| Acinetobacter | 2.482344 | -1.647600 | -3.285401 | -0.8741490 | 5.184652 | 3.997756 | 30.75649 | 0 | 0 |
| Streptococcus | -2.997110 | -5.007819 | -4.187472 | -8.1253405 | 21.395387 | 3.398336 | 29.64021 | 0 | 0 |
| Paracoccus | -0.816102 | -4.422484 | -3.479822 | -3.7610971 | 14.001570 | 3.650518 | 29.04346 | 0 | 0 |
| Bifidobacterium | -2.763916 | -4.975682 | -4.585847 | -8.0060442 | 20.686219 | 3.281978 | 28.11082 | 0 | 0 |
| [1] “##### Midwes | t WWTP2“ |
| Influent | Cell.B | Effluent | scalingFactor | AveExpr | F | P.Value | adj.P.Val | |
|---|---|---|---|---|---|---|---|---|
| Pseudomonas | 12.9178954 | 2.1696692 | 4.0452340 | -18.694367 | 7.610422 | 144.71594 | 0 | 0 |
| Flavobacterium | 5.9596378 | 6.1110278 | 8.4128520 | -13.563307 | 6.114826 | 94.09863 | 0 | 0 |
| Massilia | 4.9578066 | 5.0081413 | 5.0723110 | -8.310680 | 5.525375 | 73.30007 | 0 | 0 |
| Psychrobacter | 8.4670006 | -2.7960862 | -0.3766897 | -6.180307 | 4.621402 | 57.07140 | 0 | 0 |
| Mycobacterium | 2.2069420 | 1.0629662 | 1.1102633 | 3.730231 | 4.631318 | 51.48076 | 0 | 0 |
| Janthinobacterium | 7.9092202 | -3.0516411 | -0.7035015 | -4.968901 | 4.335694 | 50.52536 | 0 | 0 |
| Chryseobacterium | 9.8236404 | 0.0163297 | 1.3037310 | -15.534458 | 3.875554 | 47.46053 | 0 | 0 |
| Leifsonia | 0.2850388 | 3.9970141 | 3.4387455 | 1.608575 | 3.929936 | 42.24056 | 0 | 0 |
| Sporosarcina | 2.4634713 | 11.3966017 | 11.9956610 | -16.587045 | 4.905456 | 40.36015 | 0 | 0 |
| Carnobacterium | 8.9967568 | -0.7213852 | 3.3700771 | -17.488266 | 2.765606 | 38.88950 | 0 | 0 |
| [1] “##### Mid-Atla | ntic WWTP2“ |
| Influent | Activated.Sludge.Reactor | Secondary.clarifier | Effluent | scalingFactor | AveExpr | F | P.Value | adj.P.Val | |
|---|---|---|---|---|---|---|---|---|---|
| Pseudomonas | 15.3533564 | -8.293934 | -0.2650290 | -6.0221102 | -17.1186409 | 5.602349 | 161.39890 | 0 | 0 |
| Massilia | 0.5722508 | -1.261907 | 0.8698702 | 0.0849137 | 11.8001019 | 4.686981 | 114.59145 | 0 | 0 |
| Clostridium | 11.6378758 | -6.372408 | -0.6589032 | -9.0803038 | -11.5118755 | 3.960429 | 97.46035 | 0 | 0 |
| Sphingomonas | 2.7455555 | -3.374304 | 0.0378776 | 0.0963023 | 6.9253092 | 4.295542 | 95.36705 | 0 | 0 |
| Janthinobacterium | 9.0873303 | -8.581755 | -3.6969893 | -8.6307795 | -2.0549787 | 3.590749 | 94.78396 | 0 | 0 |
| Deinococcus | 0.4011576 | -1.057163 | 0.6554114 | -2.7612105 | 10.6228229 | 3.688549 | 82.65160 | 0 | 0 |
| Flavobacterium | 6.8842755 | -6.816087 | -2.6880756 | -2.5224507 | -0.2771630 | 3.706567 | 77.00756 | 0 | 0 |
| Halomonas | -1.5140567 | 4.301894 | 0.4595413 | 5.2666151 | 8.8787726 | 3.862520 | 76.70902 | 0 | 0 |
| Pedobacter | 4.0860873 | -4.081131 | -1.2957918 | -1.9527985 | 4.4869372 | 3.854354 | 76.42857 | 0 | 0 |
| Chryseobacterium | 6.8537808 | -6.678815 | -1.7132320 | -6.6957809 | -0.7111724 | 3.238090 | 71.54076 | 0 | 0 |
spray_div <- ww_div_df %>% filter( WW_TP %in% c("MA_TP1","MA_SI1")) %>%
filter(Stage %in% c("Influent","Effluent", "BeforeUVTreatment",
"AfterUVTreatment","HoldingPondInlet",
"PumpHouseInlet")) %>%
group_by(plant_name, Stage, StageID, Date, diversity) %>%
summarise(metric = median(metric))
spray_div %>% ungroup() %>%
mutate(Stage = factor(Stage,levels = c("Influent","Effluent","BeforeUVTreatment",
"AfterUVTreatment","HoldingPondInlet",
"PumpHouseInlet"))) %>%
ggplot(aes(x = Stage, y = metric)) +
geom_line(aes(group = Date), color = "grey60") +
geom_point(aes(x = Stage, y = metric)) +
facet_grid(diversity~., scale = "free") +
labs(y = "Diversity Metric", color = "WWTP") + theme_bw() +
theme(axis.text.x = element_text(angle = 90))
For shannon and OTU number there are significant differences between influent, effluent, and spray irrigation stages.
fit_aov <- function(df) aov(metric~Stage, data = df)
spray_div_test <- spray_div %>% group_by(diversity) %>%
nest() %>% mutate(aov_mod = map(data, fit_aov))
aov_summary <- spray_div_test$aov_mod %>% map_df(glance) %>%
rename(f_stat = statistic, f_p = p.value)
aov_summary$diversity <- spray_div_test$diversity
aov_summary %>% select(diversity, logLik, AIC,BIC, f_stat, f_p) %>% kable()
| diversity | logLik | AIC | BIC | f_stat | f_p |
|---|---|---|---|---|---|
| shannon | -22.1850 | 58.3700 | 67.17668 | 3.569642 | 0.0180853 |
| simpson | 117.2448 | -220.4895 | -211.68283 | 1.692608 | 0.1824067 |
| specnumber | -191.8790 | 397.7580 | 406.56472 | 6.154450 | 0.0013136 |
ANOVA Diagnostic plots
div_metrics <- c("shannon","simpson","specnumber")
for(i in 1:3){
print(autoplot(spray_div_test$aov_mod[[i]]) +
ggtitle(paste0(div_metrics[i])))
}
Testing for pairwise differences.
for(i in c("shannon", "specnumber")){
print(i)
tukey <- spray_div %>% filter(diversity == i) %>% {aov(metric~Stage, data = .)} %>% TukeyHSD()
tidy(tukey) %>% filter(adj.p.value < 0.1) %>% print()
}
## [1] "shannon"
## term comparison estimate conf.low conf.high
## 1 Stage Influent-HoldingPondInlet 1.374929 0.06097935 2.68888
## adj.p.value
## 1 0.03698308
## [1] "specnumber"
## term comparison estimate conf.low conf.high adj.p.value
## 1 Stage Influent-AfterUVTreatment 1271.125 287.8470 2254.403 0.006898620
## 2 Stage Influent-BeforeUVTreatment 1161.500 178.2220 2144.778 0.014936608
## 3 Stage Influent-HoldingPondInlet 1384.083 486.4774 2281.689 0.001193148
mrexp_spray <- plantObj[,which(pData(plantObj)$WW_TP %in% c("MA_TP1","MA_SI1") &
pData(plantObj)$Stage %in% c("RawInfluent","PostScreeningInfluent","Effluent",
"BeforeUVTreatment", "AfterUVTreatment",
"HoldingPondInlet","PumpHouseInlet"))]
pData(mrexp_spray)$Stage_name <- pData(mrexp_spray)$Stage %>%
ifelse(. %in% c("RawInfluent","PostScreeningInfluent"),"Influent",.) %>%
factor(levels =c("Influent","Effluent", "BeforeUVTreatment", "AfterUVTreatment",
"HoldingPondInlet","PumpHouseInlet"))
mrexp_spray <- aggregateByTaxonomy(mrexp_spray,
lvl = "genus",
alternate = TRUE,
norm = TRUE, log = TRUE)
mrexp_spray <- cumNorm(mrexp_spray, p = 0.75)
mrexp_spray <- filterData(mrexp_spray, present = 7, depth = 1)
# mrexp_spray <- cumNorm(mrexp_spray, p = 0.75)
s <- normFactors(mrexp_spray)
pd <- pData(mrexp_spray)
settings = zigControl(maxit = 1, verbose = FALSE)
mod = model.matrix(~Stage_name, data = pd)
colnames(mod) = levels(pd$Stage_name)
res = fitZig(obj = mrexp_spray, mod = mod, control = settings)
zigFit = res$fit
fit2 <- eBayes(zigFit)
top_tbl <- topTableF(fit2,p.value = 0.075)
top_feat <- top_tbl %>% rownames()
count_tbl <- MRcounts(mrexp_spray)
count_tbl <- count_tbl[rownames(count_tbl) %in% top_feat,]
count_df <- count_tbl %>% as.data.frame() %>%
rownames_to_column(var = "genus") %>%
gather("Seq_ID", "Count",-genus) %>% left_join(pd) %>%
mutate(Stage = factor(Stage,levels = c("RawInfluent","Effluent","BeforeUVTreatment",
"AfterUVTreatment","HoldingPondInlet",
"PumpHouseInlet")))
kable(top_tbl)
| Influent | Effluent | BeforeUVTreatment | AfterUVTreatment | HoldingPondInlet | PumpHouseInlet | scalingFactor | AveExpr | F | P.Value | adj.P.Val | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| Clostridium | 3.5570660 | -1.6141927 | -1.9595022 | -1.0891890 | -1.7437475 | -1.3724789 | 4.3545657 | 4.223864 | 156.58071 | 0 | 0 |
| Mycobacterium | 2.0651934 | 0.1811994 | 0.0985015 | 0.8622324 | 0.5073317 | -0.4457880 | 4.5526913 | 4.427186 | 127.04375 | 0 | 0 |
| Shewanella | 1.7063515 | 1.3552535 | 2.4574504 | 1.8138119 | 2.3949668 | 1.0959432 | -0.0750964 | 3.244486 | 78.74273 | 0 | 0 |
| Halomonas | 3.1254354 | 2.1345714 | 4.1157692 | 3.0225705 | 4.1584830 | 1.7392195 | -2.2136631 | 4.687832 | 76.80866 | 0 | 0 |
| Streptococcus | 5.2933433 | -2.7897601 | -3.5008617 | -3.3645001 | -3.1790217 | -5.3471901 | 1.7538123 | 2.698989 | 85.32113 | 0 | 0 |
| Legionella | -0.5004655 | 2.6120910 | 2.0660638 | 2.8075892 | 1.6454463 | 0.2716772 | 5.6389230 | 3.580591 | 77.12593 | 0 | 0 |
| Gemmobacter | 0.4679140 | 0.6253217 | -0.3641536 | 0.3102889 | -0.5985290 | 1.6409083 | 2.8580978 | 2.124359 | 72.43150 | 0 | 0 |
| Ralstonia | 0.4161509 | 1.9270323 | 3.1197301 | 2.6767776 | 3.0156011 | 1.3647255 | -0.1557410 | 2.400292 | 65.62172 | 0 | 0 |
| Pseudomonas | 10.0054000 | -4.4181942 | -6.5006839 | -6.3951256 | -5.7633871 | -2.8711371 | -1.4186639 | 4.800067 | 61.90357 | 0 | 0 |
| Rhodobacter | 1.1178660 | -0.9660405 | -1.1823087 | -0.4506051 | -0.8821008 | 1.1792098 | 4.8011202 | 3.099284 | 65.67000 | 0 | 0 |
ggplot(count_df) +
geom_line(aes(x = Stage, y = Count, group = Date), color = "grey60") +
geom_point(aes(x = Stage, y = Count)) +
facet_wrap(~genus, nrow = 2) +
theme_bw() + scale_y_log10() + labs(y = "Relative Abundance") +
theme(axis.text.x = element_text(angle = 90))
import_biom2 function form joe_diversity_function.R script.
import_biom2
## function (x, treefilename = NULL, refseqfilename = NULL, refseqFunction = readDNAStringSet,
## refseqArgs = NULL, parseFunction = parse_taxonomy_default,
## parallel = FALSE, version = 1, ...)
## {
## argumentlist <- list()
## x = biom(x)
## b_data = biom_data(x)
## b_data_mat = as(b_data, "matrix")
## otutab = otu_table(b_data_mat, taxa_are_rows = TRUE)
## argumentlist <- c(argumentlist, list(otutab))
## if (all(sapply(sapply(x$rows, function(i) {
## i$metadata
## }), is.null))) {
## taxtab <- NULL
## }
## else {
## taxlist = lapply(x$rows, function(i) {
## parseFunction(i$metadata$taxonomy)
## })
## names(taxlist) = sapply(x$rows, function(i) {
## i$id
## })
## taxtab = build_tax_table(taxlist)
## }
## argumentlist <- c(argumentlist, list(taxtab))
## if (is.null(sample_metadata(x))) {
## samdata <- NULL
## }
## else {
## samdata = sample_data(sample_metadata(x))
## }
## argumentlist <- c(argumentlist, list(samdata))
## if (!is.null(treefilename)) {
## if (inherits(treefilename, "phylo")) {
## tree = treefilename
## }
## else {
## tree <- read_tree(treefilename, ...)
## }
## if (is.null(tree)) {
## warning("treefilename failed import. It not included.")
## }
## else {
## argumentlist <- c(argumentlist, list(tree))
## }
## }
## if (!is.null(refseqfilename)) {
## if (inherits(refseqfilename, "XStringSet")) {
## refseq = refseqfilename
## }
## else {
## if (!is.null(refseqArgs)) {
## refseq = do.call("refseqFunction", c(list(refseqfilename),
## refseqArgs))
## }
## else {
## refseq = refseqFunction(refseqfilename)
## }
## }
## argumentlist <- c(argumentlist, list(refseq))
## }
## return(do.call("phyloseq", argumentlist))
## }
s_info <- devtools::session_info()
print(s_info$platform)
## setting value
## version R version 3.4.4 (2018-03-15)
## system x86_64, darwin15.6.0
## ui X11
## language (EN)
## collate en_US.UTF-8
## tz America/New_York
## date 2018-05-05
kable(s_info$packages)
| package | * | version | date | source |
|---|---|---|---|---|
| ade4 | 1.7-11 | 2018-04-05 | CRAN (R 3.4.4) | |
| ape | 5.1 | 2018-04-04 | CRAN (R 3.4.4) | |
| assertthat | 0.2.0 | 2017-04-11 | CRAN (R 3.4.0) | |
| backports | 1.1.2 | 2017-12-13 | CRAN (R 3.4.3) | |
| base | * | 3.4.4 | 2018-03-15 | local |
| bindr | 0.1.1 | 2018-03-13 | CRAN (R 3.4.4) | |
| bindrcpp | * | 0.2.2 | 2018-03-29 | CRAN (R 3.4.4) |
| Biobase | * | 2.38.0 | 2017-10-31 | Bioconductor |
| BiocGenerics | * | 0.24.0 | 2017-10-31 | Bioconductor |
| biomformat | * | 1.6.0 | 2017-10-31 | Bioconductor |
| Biostrings | 2.46.0 | 2017-10-31 | Bioconductor | |
| bitops | 1.0-6 | 2013-08-17 | CRAN (R 3.4.0) | |
| broom | * | 0.4.4 | 2018-03-29 | CRAN (R 3.4.4) |
| caTools | 1.17.1 | 2014-09-10 | CRAN (R 3.4.0) | |
| cluster | 2.0.7 | 2018-04-01 | CRAN (R 3.4.4) | |
| codetools | 0.2-15 | 2016-10-05 | CRAN (R 3.4.4) | |
| colorspace | 1.3-2 | 2016-12-14 | CRAN (R 3.4.0) | |
| compiler | 3.4.4 | 2018-03-15 | local | |
| cowplot | 0.9.2 | 2017-12-17 | cran (@0.9.2) | |
| data.table | 1.10.4-3 | 2017-10-27 | cran (@1.10.4-) | |
| datasets | * | 3.4.4 | 2018-03-15 | local |
| devtools | * | 1.13.5 | 2018-02-18 | CRAN (R 3.4.3) |
| digest | 0.6.15 | 2018-01-28 | cran (@0.6.15) | |
| dplyr | * | 0.7.4 | 2017-09-28 | cran (@0.7.4) |
| DT | * | 0.4 | 2018-01-30 | cran (@0.4) |
| evaluate | 0.10.1 | 2017-06-24 | CRAN (R 3.4.1) | |
| foreach | * | 1.4.4 | 2017-12-12 | CRAN (R 3.4.3) |
| foreign | 0.8-69 | 2017-06-22 | CRAN (R 3.4.4) | |
| gdata | 2.18.0 | 2017-06-06 | CRAN (R 3.4.0) | |
| ggExtra | * | 0.8 | 2018-04-04 | CRAN (R 3.4.4) |
| ggfortify | * | 0.4.4 | 2018-04-18 | CRAN (R 3.4.4) |
| ggplot2 | * | 2.2.1.9000 | 2018-04-05 | Github (hadley/ggplot2@3c9c504) |
| ggpubr | * | 0.1.6 | 2017-11-14 | cran (@0.1.6) |
| glmnet | * | 2.0-16 | 2018-04-02 | CRAN (R 3.4.4) |
| glue | 1.2.0 | 2017-10-29 | cran (@1.2.0) | |
| gplots | 3.0.1 | 2016-03-30 | CRAN (R 3.4.0) | |
| graphics | * | 3.4.4 | 2018-03-15 | local |
| grDevices | * | 3.4.4 | 2018-03-15 | local |
| grid | 3.4.4 | 2018-03-15 | local | |
| gridExtra | 2.3 | 2017-09-09 | cran (@2.3) | |
| gtable | 0.2.0 | 2016-02-26 | CRAN (R 3.4.0) | |
| gtools | 3.5.0 | 2015-05-29 | CRAN (R 3.4.0) | |
| highr | 0.6 | 2016-05-09 | CRAN (R 3.4.0) | |
| hms | 0.4.2 | 2018-03-10 | CRAN (R 3.4.4) | |
| htmltools | 0.3.6 | 2017-04-28 | CRAN (R 3.4.0) | |
| htmlwidgets | 1.0 | 2018-01-20 | cran (@1.0) | |
| httpuv | 1.3.6.2 | 2018-03-02 | CRAN (R 3.4.3) | |
| httr | 1.3.1 | 2017-08-20 | CRAN (R 3.4.1) | |
| igraph | 1.2.1 | 2018-03-10 | CRAN (R 3.4.4) | |
| iNEXT | * | 2.0.12 | 2016-11-12 | CRAN (R 3.4.0) |
| IRanges | 2.12.0 | 2017-10-31 | Bioconductor | |
| iterators | 1.0.9 | 2017-12-12 | CRAN (R 3.4.3) | |
| jsonlite | 1.5 | 2017-06-01 | CRAN (R 3.4.0) | |
| KernSmooth | 2.23-15 | 2015-06-29 | CRAN (R 3.4.4) | |
| knitr | * | 1.20 | 2018-02-20 | CRAN (R 3.4.3) |
| labeling | 0.3 | 2014-08-23 | CRAN (R 3.4.0) | |
| lattice | * | 0.20-35 | 2017-03-25 | CRAN (R 3.4.4) |
| lazyeval | 0.2.1 | 2017-10-29 | cran (@0.2.1) | |
| limma | * | 3.34.9 | 2018-02-22 | Bioconductor |
| magrittr | * | 1.5 | 2014-11-22 | CRAN (R 3.4.0) |
| MASS | 7.3-49 | 2018-02-23 | CRAN (R 3.4.4) | |
| Matrix | * | 1.2-13 | 2018-04-02 | CRAN (R 3.4.4) |
| matrixStats | 0.53.1 | 2018-02-11 | cran (@0.53.1) | |
| memoise | 1.1.0 | 2017-04-21 | CRAN (R 3.4.0) | |
| metagenomeSeq | * | 1.20.1 | 2017-12-12 | Bioconductor |
| methods | * | 3.4.4 | 2018-03-15 | local |
| mgcv | 1.8-23 | 2018-01-21 | CRAN (R 3.4.4) | |
| mime | 0.5 | 2016-07-07 | CRAN (R 3.4.0) | |
| miniUI | 0.1.1 | 2016-01-15 | cran (@0.1.1) | |
| mnormt | 1.5-5 | 2016-10-15 | CRAN (R 3.4.0) | |
| multtest | 2.34.0 | 2017-10-31 | Bioconductor | |
| munsell | 0.4.3 | 2016-02-13 | CRAN (R 3.4.0) | |
| nlme | 3.1-131.1 | 2018-02-16 | CRAN (R 3.4.4) | |
| parallel | * | 3.4.4 | 2018-03-15 | local |
| permute | * | 0.9-4 | 2016-09-09 | CRAN (R 3.4.0) |
| phyloseq | * | 1.22.3 | 2017-11-07 | Bioconductor |
| pillar | 1.2.1 | 2018-02-27 | CRAN (R 3.4.3) | |
| pkgconfig | 2.0.1 | 2017-03-21 | CRAN (R 3.4.0) | |
| plotly | * | 4.7.1 | 2017-07-29 | CRAN (R 3.4.1) |
| plyr | 1.8.4 | 2016-06-08 | CRAN (R 3.4.0) | |
| psych | 1.8.3.3 | 2018-03-30 | CRAN (R 3.4.4) | |
| purrr | * | 0.2.4 | 2017-10-18 | CRAN (R 3.4.2) |
| R6 | 2.2.2 | 2017-06-17 | CRAN (R 3.4.0) | |
| RColorBrewer | * | 1.1-2 | 2014-12-07 | CRAN (R 3.4.0) |
| Rcpp | 0.12.16 | 2018-03-13 | cran (@0.12.16) | |
| readr | * | 1.1.1 | 2017-05-16 | CRAN (R 3.4.0) |
| reshape2 | 1.4.3 | 2017-12-11 | cran (@1.4.3) | |
| rhdf5 | 2.22.0 | 2017-10-31 | Bioconductor | |
| rlang | 0.2.0.9001 | 2018-04-05 | Github (r-lib/rlang@9c0637a) | |
| rmarkdown | 1.9 | 2018-03-01 | CRAN (R 3.4.3) | |
| rprojroot | 1.3-2 | 2018-01-03 | CRAN (R 3.4.3) | |
| S4Vectors | 0.16.0 | 2017-10-31 | Bioconductor | |
| scales | 0.5.0.9000 | 2018-04-05 | Github (hadley/scales@d767915) | |
| shiny | 1.0.5 | 2017-08-23 | CRAN (R 3.4.1) | |
| splines | 3.4.4 | 2018-03-15 | local | |
| stats | * | 3.4.4 | 2018-03-15 | local |
| stats4 | 3.4.4 | 2018-03-15 | local | |
| stringi | 1.1.7 | 2018-03-12 | CRAN (R 3.4.4) | |
| stringr | * | 1.3.0 | 2018-02-19 | cran (@1.3.0) |
| survival | 2.41-3 | 2017-04-04 | CRAN (R 3.4.4) | |
| tibble | * | 1.4.2 | 2018-01-22 | cran (@1.4.2) |
| tidyr | * | 0.8.0 | 2018-01-29 | cran (@0.8.0) |
| tidyselect | 0.2.4 | 2018-02-26 | cran (@0.2.4) | |
| tools | 3.4.4 | 2018-03-15 | local | |
| utils | * | 3.4.4 | 2018-03-15 | local |
| vegan | * | 2.4-6 | 2018-01-24 | CRAN (R 3.4.3) |
| viridisLite | 0.3.0 | 2018-02-01 | cran (@0.3.0) | |
| withr | 2.1.2 | 2018-04-05 | Github (jimhester/withr@79d7b0d) | |
| xtable | 1.8-2 | 2016-02-05 | CRAN (R 3.4.0) | |
| XVector | 0.18.0 | 2017-10-31 | Bioconductor | |
| yaml | 2.1.18 | 2018-03-08 | cran (@2.1.18) | |
| zlibbioc | 1.24.0 | 2017-10-31 | Bioconductor |